######################
# Replication Survey Analyses
# "Race, Prejudice and Support for Racial Justice Countermovements: 
# The Case of “Blue Lives Matter”
# Newman, Merolla, Reny 2023
# Political Behavior
# For questions please contact Tyler Reny
# ttreny@gmail.com
######################

setwd('~/replication_file')

rm(list=ls())

pacman::p_load(tidyverse, simcf, MASS, 
               ggpubr, haven, pscl)
# simcf is not on CRAN and needs to be downloaded  
# though analyses could be closely replicated using another package like
# marginaleffects on CRAN

# rescale function
rescale01 <- function(x){
  minx <- min(x, na.rm=T)
  maxx <- max(x, na.rm=T)
  x <- (x - minx) / (maxx - minx)
  return(x)
}

# read in data
washu <- read_dta('surveydata/washu.dta', encoding = 'latin1')

# model forms
form1 <- blueLM_01 ~  white
form2 <- blueLM_01 ~  white + ofr_01 + education_01 + income6D2 + income6D3 + 
  income6D4 + income6D5 + income6D6 + age + male + partyid_01 + ideology_01

# model objects
m1 <- lm(form1, data=washu)
m2 <- lm(form2, data=washu)

# simulate betas
set.seed(10000)
simb1 <- mvrnorm(10000, coef(m1), vcov(m1))
simb2 <- mvrnorm(10000, coef(m2), vcov(m2))

# model matrices
dat1 <- extractdata(form1, washu, na.rm=T)
dat2 <- extractdata(form2, washu, na.rm=T)
mm1 <- cfMake(form1, dat1, nscen = 1)
mm2 <- cfMake(form2, dat2, nscen = 1)
mm1 <- cfChange(mm1, 'white', xpre=0, x=1, scen=1)
mm2 <- cfChange(mm2, 'white', xpre=0, x=1, scen=1)

# calculate yhats
mm1 <- linearsimfd(mm1, simb1) %>%
  bind_rows() %>%
  mutate(model = 'White\n(Bivariate)',Sample='Lucid\nJan 2021')
mm2 <- linearsimfd(mm2, simb2) %>%
  bind_rows() %>%
  mutate(model = 'White\n(+ Controls)',Sample='Lucid\nJan 2021')
washusample <- bind_rows(mm1, mm2)

# magnitude
washusample$pe[1]/sd(washu$blueLM_01[washu$white==1],na.rm=T)
washusample$pe[2]/sd(washu$blueLM_01[washu$white==1],na.rm=T)

# save results
s1m1 <- m1
s1m2 <- m2

# survey 2
lucid <- read_dta('surveydata/lucid.dta', encoding = 'latin1')

# formulas
form1 <- blueLM_01 ~  white
form2 <- blueLM_01 ~  white + ofr_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + age + male + partyid_01 + ideology_01

# models
m1 <- lm(form1, lucid)
m2 <- lm(form2, lucid)

# simulate betas
set.seed(10000)
simb1 <- mvrnorm(10000, coef(m1), vcov(m1))
simb2 <- mvrnorm(10000, coef(m2), vcov(m2))

# model matrices
dat1 <- extractdata(form1, lucid, na.rm=T)
dat2 <- extractdata(form2, lucid, na.rm=T)
mm1 <- cfMake(form1, dat1, nscen = 1)
mm2 <- cfMake(form2, dat2, nscen = 1)
mm1 <- cfChange(mm1, 'white', xpre=0, x=1, scen=1)
mm2 <- cfChange(mm2, 'white', xpre=0, x=1, scen=1)

# yhats
mm1 <- linearsimfd(mm1, simb1) %>%
  bind_rows() %>%
  mutate(model = 'White\n(Bivariate)',Sample='Lucid\nFeb 2021')
mm2 <- linearsimfd(mm2, simb2) %>%
  bind_rows() %>%
  mutate(model = 'White\n(+ Controls)',Sample='Lucid\nFeb 2021')
lucidsample <- bind_rows(mm1, mm2)

# descriptives
lucidsample$pe[1]/sd(lucid$blueLM_01[lucid$white==1],na.rm=T)
lucidsample$pe[2]/sd(lucid$blueLM_01[lucid$white==1],na.rm=T)

# make plots
whiteplot <- bind_rows(washusample, lucidsample) %>%
  mutate(Sample=factor(Sample, levels=rev(c(
    'Lucid\nJan 2021',
    'Lucid\nFeb 2021'
  )))) %>%
  ggplot(aes(x=model, y=pe, ymin=lower, ymax=upper, shape=Sample)) +
  geom_errorbar(width=0.1, position=position_dodge(width=0.2)) +
  geom_point(position=position_dodge(width=0.2),size=3,fill='white') +
  coord_flip() + 
  geom_hline(yintercept=0,linetype=2, color='grey') + 
  theme_bw()  +
  labs(x='',y='Change Predicted Support BlueLM\nWhite vs. Non-White') +
  scale_shape_manual(values=c(21,24), breaks=c('Lucid\nJan 2021', 'Lucid\nFeb 2021'))
whiteplot

# store results
s2m1 <- m1
s2m2 <- m2

# create reg tables
stargazer::stargazer(s1m1, s1m2, s2m1, s2m2,
                     type='text',
                     covariate.labels = c(
                     'White',
                      'Old-Fashioned Racism','Education','Income Q2',
                     'Income Q3','Income Q4','Income Q5','Income Q6',
                     'Age','Male','Party ID (R)','Ideology (C)','Intercept'
                     ),dep.var.labels = c('Support Blue Lives Matter'),
                     star.cutoffs = c(0.05,0.01,0.001), 
                     out ='figure1A.html')

# flag sales analysis plots
# to create the csv for this plot, run model_flag_data.R file which will
# produce this pred_probs_flags_model.csv 
flag <- read_csv('plotdata/pred_probs_flags_model.csv')
rugdat <- read_stata('flagdata/Anley Flag Sales_Zip Level Data.dta')

set.seed(10000)
rugdat <- rugdat %>% dplyr::select(sale4x6, pwht19zip_01) %>%
  sample_n(1000)

flagplot <- flag %>%
  ggplot(aes(x=x, y=y, ymin=l, ymax=u)) +
  geom_ribbon(alpha=0.2) +
  geom_line() +
  theme_bw()  +
  geom_rug(data=rugdat, aes(y=sale4x6, x=pwht19zip_01), inherit.aes = F,sides = 'b') +
  scale_y_continuous(limits=c(0.1,0.70)) +
  labs(x='Pct White (Zipcode)',y='Predicted Count\nZipcode Flag Sales ') 

flag$y[1]*10 # to
flag$y[20]*10 # to

plots <- ggarrange(whiteplot, flagplot,
                   heights = c(6,6),
                   widths = c(8,8),
                   ncol = 2, nrow = 1)
plots
ggsave(plots, width=8, height=4,  
       filename = 'whiteness_figure.pdf')


#####
# Prejudice analysis
#####

# lucid data
m1 <- lm(blueLM_01 ~  ofr_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + age + male + partyid_01 + ideology_01, 
         data=lucid[lucid$white==1,])
m2 <- lm(blueLM_01 ~  generations_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + age + male + partyid_01 + ideology_01, 
         data=lucid[lucid$white==1,])

# draw betas
set.seed(10000)
simb1 <- mvrnorm(10000, coef(m1), vcov(m1))
simb2 <- mvrnorm(10000, coef(m2), vcov(m2))

# formulas
form1 <- blueLM_01 ~ 
  ofr_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + 
  age + male + partyid_01 + ideology_01
form2 <- blueLM_01 ~ 
  generations_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + 
  age + male + partyid_01 + ideology_01

# make model matrices
dat1 <- extractdata(form1, lucid[lucid$white==1,], na.rm=T)
dat2 <- extractdata(form2, lucid[lucid$white==1,], na.rm=T)
mm1 <- cfMake(form1, dat1, nscen = 1)
mm2 <- cfMake(form2, dat2, nscen = 1)
mm1 <- cfChange(mm1, 'ofr_01', xpre=0, x=1, scen=1)
mm2 <- cfChange(mm2, 'generations_01', xpre=0, x=1, scen=1)

# yhats
mm1 <- linearsimfd(mm1, simb1) %>%
  bind_rows() %>%
  mutate(IV='Old-Fashioned\nRacism', sample='Lucid\nFeb 2021')
mm2 <- linearsimfd(mm2, simb2) %>%
  bind_rows() %>%
  mutate(IV='Racial\nResentment', sample='Lucid\nFeb 2021')

lucid1 <- bind_rows(mm1, mm2)

# save results
s1m1 <- m1
s1m2 <- m2

# magnitude
lucid1$pe[1]/sd(lucid$blueLM_01[lucid$white==1],na.rm=T)
lucid1$pe[2]/sd(lucid$blueLM_01[lucid$white==1],na.rm=T)

# read in washu data again
washu <- read_dta('surveydata/washu.dta',encoding = 'latin1')

# formulas
form1 <- blueLM_01 ~ 
  ofr_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + 
  age + male + partyid_01 + ideology_01
form2 <- blueLM_01 ~ 
  generations_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + 
  age + male + partyid_01 + ideology_01

# models
m1 <- lm(form1,  data=washu[washu$white==1,])
m2 <- lm(form2,  data=washu[washu$white==1,])

# draw betas
set.seed(10000)
simb1 <- mvrnorm(10000, coef(m1), vcov(m1))
simb2 <- mvrnorm(10000, coef(m2), vcov(m2))

# model matrices
dat1 <- extractdata(form1, washu[washu$white==1,], na.rm=T)
dat2 <- extractdata(form2, washu[washu$white==1,], na.rm=T)
mm1 <- cfMake(form1, dat1, nscen = 1)
mm2 <- cfMake(form2, dat2, nscen = 1)
mm1 <- cfChange(mm1, 'ofr_01', xpre=0, x=1, scen=1)
mm2 <- cfChange(mm2, 'generations_01', xpre=0, x=1, scen=1)

# yhats
mm1 <- linearsimfd(mm1, simb1) %>%
  bind_rows() %>%
  mutate(IV='Old-Fashioned\nRacism', sample='Lucid\nJan 2021')
mm2 <- linearsimfd(mm2, simb2) %>%
  bind_rows() %>%
  mutate(IV='Racial\nResentment', sample='Lucid\nJan 2021')
lucid2 <- bind_rows(mm1, mm2)

# save results
s2m1 <- m1
s2m2 <- m2

# magnitude
lucid2$pe[1]/sd(washu$blueLM_01[washu$white==1],na.rm=T)
lucid2$pe[2]/sd(washu$blueLM_01[washu$white==1],na.rm=T)

# third survey
yougov <- read_dta('surveydata/yougov.dta', encoding = 'latin1')
yougov$partyid_01 <- yougov$pid7_01

# formula
form1 <-blueLM_01 ~ RRscale_01 + education_01 + income6D2 + income6D3 + income6D4 + income6D5 + income6D6 + 
  age + male + partyid_01 + ideology_01

# run model
m1 <- lm(form1, data=yougov)

# simulated betas
set.seed(10000)
simb1 <- mvrnorm(10000, coef(m1), vcov(m1))

# model matrices
dat1 <- extractdata(form1, yougov, na.rm=T)
mm1 <- cfMake(form1, dat1, nscen = 1)
mm1 <- cfChange(mm1, 'RRscale_01', xpre=0, x=1, scen=1)

# yhats
mm1 <- linearsimfd(mm1, simb1) %>%
  bind_rows() %>%
  mutate(IV='Racial\nResentment', sample='YouGov\nDec 2020')

# magnitude
mm1$pe/sd(yougov$blueLM_01,na.rm=T)

# bind all
all <- bind_rows(lucid1, lucid2, mm1) 

prejudice <- all %>%
  mutate(sample=factor(sample, levels=rev(c(
    'YouGov\nDec 2020',
    'Lucid\nJan 2021',
    'Lucid\nFeb 2021'
  )))) %>%
  ggplot(aes(x=sample, y=pe, ymin=lower, ymax=upper, shape=IV)) +
  geom_errorbar(width=0.1, position=position_dodge(width=0.2)) +
  geom_point(position=position_dodge(width=0.2),size=2,fill='white') +
  coord_flip() + 
  geom_hline(yintercept=0,linetype=2, color='grey') + 
  theme_bw()  +
  theme(legend.text = element_text(margin = margin(t = 5))) + 
  labs(x='',y='Change Predicted Support\nBlue Lives Matter') +
  scale_shape_manual(values=c(21,24),name='', breaks=c(
    'Racial\nResentment','Old-Fashioned\nRacism'
  )) 
prejudice

s3m1 <- m1

# table
stargazer::stargazer(s3m1, s1m1, s1m2, s2m1, s2m2,
                     type='text',
                     covariate.labels = c(
                       'Racial Resentment Scale','Old-Fashioned Racism','Generations (RR)',
                       'Education','Income Q2','Income Q3','Income Q4','Income Q5','Income Q6',
                       'Age','Male','Party ID (R)','Ideology (C)','Intercept'
                     ),dep.var.label = c('Support Blue Lives Matter'),
                     column.labels = c('YouGov','Lucid S1','Lucid S1','Lucid S2','Lucid S2'),
                     star.cutoffs = c(0.05,0.01,0.001), 
                     out ='figure2A.html')

# twitter data

out <- read_csv('twitterdata/fulltwit.csv')
subdf <- read_csv('twitterdata/subdf.csv')

# analyses
summary(lm1 <- lm(log(tweets_percap) ~ resent + pwht610 +  popden610 + pcollege610 + medinc610 + p65older610 + pprotect610, subdf))
summary(lm2 <- lm(log(positive_tweets_percap) ~ resent + pwht610 +  popden610 + pcollege610 + medinc610 + p65older610 + pprotect610, subdf))
summary(lm3 <- lm(log(joy_tweets_percap) ~ resent + pwht610 +  popden610 + pcollege610 + medinc610 + p65older610 + pprotect610, subdf))
summary(lm5 <- lm(log(tweets_percap) ~ anti_immigr + pwht610 + popden610 + pcollege610 + medinc610 + p65older610 + pprotect610, subdf))
summary(lm6 <- glm.nb(n~resent + pwht610 + popden610 + pcollege610 + medinc610 + p65older610 +  pprotect610 + I(log(pop)),
                      data=subdf,control = glm.control(maxit=500)))

summary(lm7 <- lm(log(tweets_percap+1)~ resent + pwht610 + popden610 + pcollege610 + medinc610 + p65older610 + pprotect610, out))
summary(lm8 <- pscl::zeroinfl(n ~ resent + pwht610 + popden610 + pcollege610 + p65older610 + medinc610 + pprotect610 + I(log(pop)) |
                          resent + pwht610 + popden610 + pcollege610 + p65older610 + medinc610,
                        data=out, dist='negbin'))

stargazer::stargazer(lm1, lm2, lm3, lm5, lm6, lm7, 
          type='text',style = 'APSR',
          covariate.labels = c('White Racial Resentment 10-14',
                               'White Immigrant Attitudes 10-12','Pct White 06-10','ln(Pop Density)',
                               'Pct College 06-10', 'Med Income 06-10','Pct 65+ 06-10','Pct Protective Serv 06-10',
                               'ln(Population)', 'Intercept'),
          dep.var.labels = c('ln(Tweets/1000)','ln(Positive/1000)','ln(Joy/1000)','ln(Tweets/1000)',
                             'N Tweets','ln(Tweets/1000)','N Tweets'),
          star.cutoffs = c(0.05,0.01,0.001),
          out='tableb4.html')

# add in last model by hand
texreg::screenreg(list(lm8))

# pred probs
summary(lm1 <- lm(log(tweets_percap) ~ resent + pwht610 +  popden610 + pcollege610 + medinc610 + p65older610 + pprotect610, subdf))
summary(lm2 <- lm(log(tweets_percap) ~ mrp_estimated_ofr + pwht610 +  popden610 + pcollege610 + medinc610 + p65older610 + pprotect610, subdf))

gen_pp <- function(model, formula){
  set.seed(10000)
  simbetas <- MASS::mvrnorm(10000, coef(model), vcov(model))
  mm <- cfMake(formula,
               data=subdf, nscen=10)
  mm <- cfChange(mm, 'resent',x=seq(min(subdf$resent, na.rm=T), max(subdf$resent, na.rm=T), length.out=10), scen=1:10)
  ppres <- linearsimev(mm, simbetas, ci=0.9)
  ppres <- data.frame(x=seq(min(subdf$resent, na.rm=T), max(subdf$resent, na.rm=T), length.out=10),
             y=exp(ppres$pe),
             l=exp(ppres$lower),
             u=exp(ppres$upper))
  return(ppres)
}

gen_pp2 <- function(model, formula){
  set.seed(10000)
  simbetas <- MASS::mvrnorm(10000, coef(model), vcov(model))
  mm <- cfMake(formula,
               data=subdf, nscen=10)
  mm <- cfChange(mm, 'mrp_estimated_ofr',x=seq(min(subdf$mrp_estimated_ofr, na.rm=T), max(subdf$mrp_estimated_ofr, na.rm=T), length.out=10), scen=1:10)
  ppres <- linearsimev(mm, simbetas, ci=0.9)
  ppres <- data.frame(x=seq(min(subdf$mrp_estimated_ofr, na.rm=T), max(subdf$mrp_estimated_ofr, na.rm=T), length.out=10),
                      y=exp(ppres$pe),
                      l=exp(ppres$lower),
                      u=exp(ppres$upper))
  return(ppres)
}

all <- gen_pp(lm1, log(tweets_percap) ~ resent + pwht610 +  popden610 + pcollege610 + medinc610 + p65older610 + pprotect610) %>%
  mutate(iv='Racial Resentment')
all2 <- gen_pp2(lm2, log(tweets_percap) ~ mrp_estimated_ofr + pwht610 +  popden610 + pcollege610 + medinc610 + p65older610 + pprotect610)%>%
  mutate(iv='Old-Fashioned Racism')

subdf$dummy <- 4

tweets <- bind_rows(all) %>%
  ggplot(aes(x, y, ymin=l, ymax=u)) +
  geom_ribbon(alpha=0.5) +
  geom_line() +
  scale_y_continuous(trans='log', breaks=seq(1,25, by=2)) +
  theme_bw() +
  labs(x='White Prejudice\n(County)', y='Predicted\n#bluelivesmatter Tweets\nper 1000 residents') +
  geom_rug(data=subdf,aes(x=resent,y=dummy),
           inherit.aes = FALSE, sides='b') +
  coord_cartesian(clip = 'off') +
  theme(axis.text.y = element_text(size=5)) 


plots <- ggarrange(prejudice, tweets,
                   heights = c(4,4),
                   widths = c(8,8),
                   ncol = 2, nrow = 1)
plots
ggsave(plots, width=9, height=3,  filename = 'prejudice_figure2.png')
  



